#Althoff IFN data processing

This analysis is to look at why Scribble KO HSC are insensitive to activating signals from interferon. To understand this WT and Scibble KO mice are treated or not with PolyIC an interferon stimulator. There are 4 groups here with WT, KO, WTIC, and KOIC. There are 3 mice with untreated condition and 4 mice per treated condition.

Pre-processing

  1. Adapters were trimmed with Cutadapt
  2. Gene expression was quantified using salmon v1.3.0
  3. TPMs were obtained for the genes using tximport 1.20.0
library(ggplot2)
library(DESeq2)
library(tximport)
library(readr)
library(tximportData)
library(readxl)
library(knitr)
library(tidyverse)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(ggrepel)
library(EnhancedVolcano)
library(fgsea)
library(limma)
library(VennDiagram)
library(UpSetR)
library(wesanderson)
library(kableExtra)
library(reshape)
library(dplyr)
library(msigdbr)

Load color palettes

Metadata Table
Sample Condition
WT_rep1 WT
WT_rep2 WT
WT_rep3 WT
KO_rep1 KO
KO_rep2 KO
KO_rep3 KO
WTIC_rep1 WTIC
WTIC_rep2 WTIC
WTIC_rep3 WTIC
WTIC_rep4 WTIC
KOIC_rep1 KOIC
KOIC_rep2 KOIC
KOIC_rep3 KOIC
KOIC_rep4 KOIC

We see high correlation between all samples that were treated regardless of phenotype. In the untreated samples there was less correlation this may be due to the mouse variability.

PCA plot of non-normalized data

PC1 vs PC2

This doesn’t seem to match the correlation data which indicated that the treated samples were more similar to each other.

Run Differential Expression testing using DESeq2 and Calculate Gene Set Enrichment

Compare WTvKO, WTvWTIC, KOvKOIC, WTICvKOIC

sig = padj <0.01 and abs(l2fc) >0.5

## [1] TRUE
## [1] TRUE
## using just counts from tximport
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## [1] 4437
## [1] 1423
## [1] 15
## [1] 4481
kable(significant_genes)
comparison sig.genes
WTvsWTIC 4437
WTvKO 1423
WTICvKOIC 15
KOvKOIC 4481
DEG_WTvKO<-as.data.frame(res_WTvKO) %>%
  filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
  rownames_to_column(var="ensembl_gene_id")%>%
  left_join(ens2gene,by="ensembl_gene_id")%>%
  select(Gene, padj, pvalue, log2FoldChange)%>%
  arrange(padj)


DEG_WTvWTIC<-as.data.frame(res_WTvWTIC)%>%
  filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
  rownames_to_column(var="ensembl_gene_id")%>%
  left_join(ens2gene,by="ensembl_gene_id")%>%
  select(Gene, padj, pvalue, log2FoldChange)%>%
  arrange(padj)

DEG_WTICvKOIC<-as.data.frame(res_WTICvKOIC)%>%
  filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
  rownames_to_column(var="ensembl_gene_id")%>%
  left_join(ens2gene,by="ensembl_gene_id")%>%
  select(Gene, padj, pvalue, log2FoldChange)%>%
  arrange(padj)

DEG_KOvKOIC<-as.data.frame(res_KOvKOIC)%>%
  filter(padj<.01 & abs(log2FoldChange) >= .5)%>%
  rownames_to_column(var="ensembl_gene_id")%>%
  left_join(ens2gene,by="ensembl_gene_id")%>%
  select(Gene, padj, pvalue, log2FoldChange)%>%
  arrange(padj)
plotDispEsts(dds.filtered)

Volcano Plots

## Warning: Removed 4870 rows containing missing values (geom_point).

## Warning: Removed 3763 rows containing missing values (geom_point).

## Warning: Removed 3763 rows containing missing values (geom_point).

## Warning: Removed 1479 rows containing missing values (geom_point).

MA Plots

PCA analysis

## Warning: Unknown or uninitialised column: `Mouse_genotype`.

Top 20 DEG plots

top_20_WTvKO<-DEG_WTvKO[1:20, 1]
top_20_WTvWTIC<-DEG_WTvWTIC[1:20, 1]
top_20_KOvKOIC<-DEG_KOvKOIC[1:20, 1]
top_20_WTICvKOIC<-DEG_WTvWTIC[1:20, 1]

normalized_counts <- counts(dds.filtered, normalized=T)
normalized_counts<-as.data.frame(normalized_counts)
normalized_counts<-rownames_to_column(normalized_counts, var="ensembl_gene_id")
normalized_counts<-inner_join(normalized_counts, ens2gene, by="ensembl_gene_id")


top20_norm_counts_WTvKO <- filter(normalized_counts, Gene %in% top_20_WTvKO)%>%
  select(Gene, WT_rep1, WT_rep2, WT_rep3, KO_rep1, KO_rep2, KO_rep3)
## stopped here there is something wrong
melted_top20_norm_counts_WTvKO <- data.frame(melt(top20_norm_counts_WTvKO))
## Using Gene as id variables
colnames(melted_top20_norm_counts_WTvKO)<-c("Gene", "Sample", "normalized_counts")
melted_top20_norm_counts_WTvKO<-full_join(melted_top20_norm_counts_WTvKO,metadata, by="Sample")
melted_top20_norm_counts_WTvKO<-filter(melted_top20_norm_counts_WTvKO, Condition!=c("WTIC", "KOIC"))
ggplot(melted_top20_norm_counts_WTvKO) +
        geom_point(aes(x = Gene, y = normalized_counts, color = Condition)) +
        scale_y_log10() +
        xlab("Genes") +
        ylab("Normalized Counts") +
        ggtitle("Top 20 Significant DE Genes") +
        theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(plot.title=element_text(hjust=0.5))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4 rows containing missing values (geom_point).

kable(top20_norm_counts_WTvKO)
Gene WT_rep1 WT_rep2 WT_rep3 KO_rep1 KO_rep2 KO_rep3
P2rx5 90.36663 0.00000 0.0000 0.000000 0.000000 0.0000
Ldhc 0.00000 0.00000 0.0000 0.000000 0.000000 100.3481
Gm5884 0.00000 130.99230 0.0000 0.000000 0.000000 0.0000
Mmgt2 822.58968 809.62709 775.7687 244.176446 211.743819 219.8535
Olfr248 0.00000 0.00000 0.0000 186.236272 0.000000 0.0000
Gm3086 0.00000 0.00000 0.0000 0.000000 0.000000 135.0137
Gm14817 93.74482 0.00000 0.0000 0.000000 0.000000 0.0000
Gm11716 95.43392 0.00000 0.0000 0.000000 0.000000 0.0000
Gm17022 96.27846 0.00000 0.0000 0.000000 0.000000 0.0000
Gm9903 0.00000 83.64568 0.0000 0.000000 0.000000 0.0000
Gm28539 0.00000 226.47463 0.0000 0.000000 0.000000 0.0000
Gm37571 0.00000 0.00000 0.0000 0.000000 115.875555 0.0000
Gm38289 0.00000 0.00000 0.0000 0.000000 211.743819 0.0000
Gm19148 0.00000 0.00000 0.0000 0.000000 0.000000 191.5736
1700018A23Rik 0.00000 0.00000 114.0614 0.000000 0.000000 0.0000
Gm43936 247.45254 0.00000 0.0000 0.000000 0.000000 0.0000
Gm44108 0.00000 0.00000 0.0000 0.000000 0.000000 178.8020
Gm45399 121.61490 170.44781 358.0471 4.966301 3.334548 0.0000
Gm49369 0.00000 0.00000 0.0000 137.400983 0.000000 0.0000
Gm47502 0.00000 0.00000 0.0000 0.000000 192.570166 0.0000
ens2gene <- unique(ens2gene)
#pulls the hallmark gene set for mouse
h_gene_sets_mouse = msigdbr(species = "mouse", category = "H")
mouse.hallmark.list = base::split(x = h_gene_sets_mouse$gene_symbol, f = h_gene_sets_mouse$gs_name)

#tidying data

WTvKO_gsea<-results(dds.filtered, contrast=c("Condition", "WT", "KO"),tidy=TRUE)
colnames(WTvKO_gsea)[1]<-"ensembl_gene_id"
WTvKO_gsea<-inner_join(WTvKO_gsea, ens2gene, by="ensembl_gene_id")
WTvKO_gsea_sum<-WTvKO_gsea%>%
  dplyr::select(Gene, stat) %>%
  na.omit() %>%
  distinct() %>%
  group_by(Gene) %>%
  summarise(stat=mean(stat))

rank_posWTvKO<-deframe(WTvKO_gsea_sum)
fgsea_posWTvKO_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTvKO)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.26% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fgseaRes_posWTvKO_tidy <-fgsea_posWTvKO_hallmark%>%
  as_tibble() %>%
  arrange(desc(NES))

WTvWTIC_gsea<-results(dds.filtered, contrast=c("Condition", "WT", "WTIC"), tidy=TRUE)
colnames(WTvWTIC_gsea)[1]<-"ensembl_gene_id"
WTvWTIC_gsea<-inner_join(WTvWTIC_gsea, ens2gene, by="ensembl_gene_id")
WTvWTIC_gsea_sum<-WTvWTIC_gsea%>%
  dplyr::select(Gene, stat) %>%
  na.omit() %>%
  distinct() %>%
  group_by(Gene) %>%
  summarise(stat=mean(stat))

rank_posWTvWTIC<-deframe(WTvWTIC_gsea_sum)
fgsea_posWTvWTIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTvWTIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.15% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less
## than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posWTvWTIC_tidy <-fgsea_posWTvWTIC_hallmark%>%
  as_tibble() %>%
  arrange(desc(NES))

KOvKOIC_gsea<-results(dds.filtered, contrast=c("Condition","KO", "KOIC"),tidy=TRUE)
colnames(KOvKOIC_gsea)[1]<-"ensembl_gene_id"
KOvKOIC_gsea<-inner_join(KOvKOIC_gsea, ens2gene, by="ensembl_gene_id")
KOvKOIC_gsea_sum<-KOvKOIC_gsea%>%
  dplyr::select(Gene, stat) %>%
  na.omit() %>%
  distinct() %>%
  group_by(Gene) %>%
  summarise(stat=mean(stat))

rank_posKOvKOIC<-deframe(KOvKOIC_gsea_sum)
fgsea_posKOvKOIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posKOvKOIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.13% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posKOvKOIC_tidy <-fgsea_posKOvKOIC_hallmark%>%
  as_tibble() %>%
  arrange(desc(NES))

WTICvKOIC_gsea<-results(dds.filtered, contrast=c("Condition", "WTIC", "KOIC"),tidy=TRUE)
colnames(WTICvKOIC_gsea)[1]<-"ensembl_gene_id"
WTICvKOIC_gsea<-inner_join(WTICvKOIC_gsea, ens2gene, by="ensembl_gene_id")
WTICvKOIC_gsea_sum<-WTICvKOIC_gsea%>%
  dplyr::select(Gene, stat) %>%
  na.omit() %>%
  distinct() %>%
  group_by(Gene) %>%
  summarise(stat=mean(stat))

rank_posWTICvKOIC<-deframe(WTICvKOIC_gsea_sum)
fgsea_posWTICvKOIC_hallmark<- fgsea(pathways=mouse.hallmark.list, stats=rank_posWTICvKOIC)
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.18% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.

## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
fgseaRes_posWTICvKOIC_tidy <-fgsea_posWTICvKOIC_hallmark%>%
  as_tibble() %>%
  arrange(desc(NES))